df_to_explore.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 9134 entries, 0 to 9133 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Customer 9134 non-null object 1 State 9134 non-null object 2 Customer Lifetime Value 9134 non-null float64 3 Response 9134 non-null object 4 Coverage 9134 non-null object 5 Education 9134 non-null object 6 Effective To Date 9134 non-null datetime64[ns] 7 EmploymentStatus 9134 non-null object 8 Gender 9134 non-null object 9 Income 9134 non-null float64 10 Location Code 9134 non-null object 11 Marital Status 9134 non-null object 12 Monthly Premium Auto 9134 non-null float64 13 Months Since Last Claim 9134 non-null float64 14 Months Since Policy Inception 9134 non-null float64 15 Number of Open Complaints 9134 non-null float64 16 Number of Policies 9134 non-null float64 17 Policy Type 9134 non-null object 18 Policy 9134 non-null object 19 Renew Offer Type 9134 non-null object 20 Sales Channel 9134 non-null object 21 Total Claim Amount 9134 non-null float64 22 Vehicle Class 9134 non-null object 23 Vehicle Size 9134 non-null object dtypes: datetime64[ns](1), float64(8), object(15) memory usage: 1.7+ MB
sns.pairplot(data=df_to_explore)
<seaborn.axisgrid.PairGrid at 0x7fdb86456cd0>
sns.pairplot(data=df_to_explore,hue="Sales Channel")
<seaborn.axisgrid.PairGrid at 0x7fdb85e33650>
sns.pairplot(data=df_to_explore,hue="Gender",palette="husl")
<seaborn.axisgrid.PairGrid at 0x7fdb7670d610>
df_to_explore.groupby(["Sales Channel","State","Response"]).agg(Total=("Gender","count"))
| Total | |||
|---|---|---|---|
| Sales Channel | State | Response | |
| Agent | Arizona | No | 518 |
| Yes | 125 | ||
| California | No | 981 | |
| Yes | 220 | ||
| Nevada | No | 267 | |
| Yes | 63 | ||
| Oregon | No | 787 | |
| Yes | 201 | ||
| Washington | No | 258 | |
| Yes | 57 | ||
| Branch | Arizona | No | 422 |
| Yes | 47 | ||
| California | No | 788 | |
| Yes | 112 | ||
| Nevada | No | 219 | |
| Yes | 28 | ||
| Oregon | No | 639 | |
| Yes | 80 | ||
| Washington | No | 205 | |
| Yes | 27 | ||
| Call Center | Arizona | No | 313 |
| Yes | 40 | ||
| California | No | 530 | |
| Yes | 75 | ||
| Nevada | No | 148 | |
| Yes | 15 | ||
| Oregon | No | 447 | |
| Yes | 50 | ||
| Washington | No | 135 | |
| Yes | 12 | ||
| Web | Arizona | No | 207 |
| Yes | 31 | ||
| California | No | 395 | |
| Yes | 49 | ||
| Nevada | No | 124 | |
| Yes | 18 | ||
| Oregon | No | 352 | |
| Yes | 45 | ||
| Washington | No | 91 | |
| Yes | 13 |
df_to_explore.groupby(["State","Response"]).agg(Total=("Gender","count")).unstack(1)["Total"].plot(kind="bar")
plt.show()
df_tmp=df_to_explore.groupby((["Renew Offer Type","Response"])).agg(Percentage=("Gender","count"))
df_tmp.groupby(level=0).apply(lambda x: 100*x / float(x.sum()))
| Percentage | ||
|---|---|---|
| Renew Offer Type | Response | |
| Offer1 | No | 84.168443 |
| Yes | 15.831557 | |
| Offer2 | No | 76.623377 |
| Yes | 23.376623 | |
| Offer3 | No | 97.905028 |
| Yes | 2.094972 | |
| Offer4 | No | 100.000000 |
df_to_explore[df_to_explore["Response"]=="Yes"].groupby((["Vehicle Class","Renew Offer Type"])).agg(Total=("Gender","count"))["Total"].unstack().plot(kind="bar")
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb6fc90390>
df_to_explore[df_to_explore["Response"]=="Yes"].groupby((["Renew Offer Type", "Vehicle Class"])).agg(Total=("Gender","count"))["Total"].unstack().plot(kind="bar")
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb6fbd9dd0>
plt.figure(figsize=(9,7))
k=1
for i in df_to_explore["Sales Channel"].unique():
ax=plt.subplot(2,2,k)
df_tmp=df_to_explore[df_to_explore["Sales Channel"]==i].groupby(["Vehicle Size","Response"]).agg(Total=("Gender","count"))
df_tmp.groupby(level=0).apply(lambda x: 100*x / float(x.sum()))["Total"].unstack().plot(kind="bar",ax=ax, rot = 45, title ="% "+i)
plt.axhline(y=20,color="red",linestyle="--")
k+=1
plt.tight_layout()
plt.show()
plt.figure(figsize=(10,7))
k=1
for i in df_to_explore["CLV Range"].unique():
ax=plt.subplot(2,2,k)
df_to_explore[df_to_explore["CLV Range"]==i].groupby(["Months range","Response"]).agg(Total=("Gender","count"))["Total"].unstack().plot(kind="bar",ax=ax,title=i)
k+=1
plt.tight_layout()
plt.show()
fig, ax = plt.subplots(1,1,figsize=(20, 12))
ax.set_title("Customer Dendograms")
dend = shc.dendrogram(shc.linkage(X, method='ward'),ax=ax)
ax.axhline(y=40,linewidth=3,linestyle='--',color='purple')
plt.show()
x[x["Engaged"]==0].groupby("Cluster").mean()
| Customer Lifetime Value | Engaged | Months Since Policy Inception | Income | |
|---|---|---|---|---|
| Cluster | ||||
| 0 | 21106.615763 | 0 | 50.148246 | 33339.967544 |
| 1 | 5503.350598 | 0 | 33.446910 | 17958.721748 |
| 2 | 5764.989344 | 0 | 82.001679 | 16047.240974 |
| 4 | 6283.256747 | 0 | 31.529412 | 71440.180000 |
| 5 | 5905.842851 | 0 | 80.243649 | 72029.667436 |
sns.pairplot(x[x["Engaged"]==0][['Customer Lifetime Value', 'Months Since Policy Inception',
'Income', 'Cluster']],hue="Cluster")
<seaborn.axisgrid.PairGrid at 0x7fdb6760ebd0>
x[x["Engaged"]==1].groupby("Cluster").mean()
| Customer Lifetime Value | Engaged | Months Since Policy Inception | Income | |
|---|---|---|---|---|
| Cluster | ||||
| 3 | 7854.871361 | 1 | 48.266055 | 38544.027523 |
sns.pairplot(x[x["Engaged"]==1][['Customer Lifetime Value', 'Months Since Policy Inception',
'Income', 'Cluster']],hue="Cluster")
<seaborn.axisgrid.PairGrid at 0x7fdb6594d8d0>
x.groupby("Cluster").mean()
| Customer Lifetime Value | Months Since Policy Inception | Income | |
|---|---|---|---|
| Cluster | |||
| 0 | 5780.888013 | 64.704469 | 13146.379647 |
| 1 | 8421.708920 | 78.221932 | 68111.397737 |
| 2 | 6358.995640 | 32.450242 | 66039.783092 |
| 3 | 5579.256171 | 14.809723 | 17938.300084 |
| 4 | 19687.917343 | 33.057922 | 25544.524702 |
| 5 | 38999.543684 | 57.682927 | 43784.981707 |
sns.pairplot(x,hue="Cluster")
<seaborn.axisgrid.PairGrid at 0x7fdb661a6f90>

x.groupby("Cluster").mean()
| Customer Lifetime Value | Months Since Policy Inception | Income | |
|---|---|---|---|
| Cluster | |||
| 0 | 8564.501208 | 48.576642 | 20288.919708 |
| 1 | 6654.633226 | 47.740741 | 69419.950617 |
sns.pairplot(x,hue="Cluster")
<seaborn.axisgrid.PairGrid at 0x7fdb64ed94d0>
